Google community mobility data is the open-source data reflecting the mobility changes across global communities in response to policies aimed at combating the COVID-19 crisis. This data set tracks daily movement trends into six categories, including retail and recreation, groceries and pharmacy, parks, transit stations, workplace, and residential. In this notebook, we demonstrate how to query google mobility data using the BigQuery API client python library, and visualize the mobility changes over time since February 2020.
Using the Google BigQuery API Client library requires authentication set up. Please follow the Instruction and download the a JSON key to your local computer.
Uncomment the line(by removing the #) to install python packages if needed.
#!pip install numpy
#!pip install pandas
#!pip install geopandas
#!pip install --upgrade 'google-cloud-bigquery[bqstorage,pandas]'
#!pip install plotly==4.14.3
#!pip install --upgrade google-cloud-bigquery
import pandas as pd
import geopandas as gpd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
import json
import os
import math
from google.cloud import bigquery
# Setup the Google application credential
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="<PATH_to_Google_JSON_key>"
# Create Client object for query purpose
client = bigquery.Client()
# Query for data description in google mobility data
table_sql="""
SELECT *
FROM `bigquery-public-data.covid19_google_mobility`.INFORMATION_SCHEMA.COLUMNS
"""
query_job = client.query(table_sql)
mobility_col_df = query_job.to_dataframe()
mobility_col_df[["column_name", "data_type"]]
# Query for mobility data (State level) of the US
sql_us="""
SELECT
country_region,
sub_region_1,
iso_3166_2_code,
date,
retail_and_recreation_percent_change_from_baseline as recreation,
grocery_and_pharmacy_percent_change_from_baseline as grocery,
parks_percent_change_from_baseline as park,
transit_stations_percent_change_from_baseline as transit,
workplaces_percent_change_from_baseline as workplace,
residential_percent_change_from_baseline as residential
FROM `bigquery-public-data.covid19_google_mobility.mobility_report`
WHERE
country_region_code = 'US' AND
sub_region_1 is not null AND
iso_3166_2_code is not null
order by sub_region_1, date
"""
us_query_job = client.query(sql_us)
us_mobility_df = us_query_job.to_dataframe()
us_mobility_df.describe()
# Change the format of "iso_3166_2_code" and "date"
us_mobility_df["iso_3166_2_code"]=us_mobility_df["iso_3166_2_code"].str.replace('US-', '', regex=True)
us_mobility_df = us_mobility_df.fillna(0).copy()
us_mobility_df['date'] = pd.to_datetime(us_mobility_df['date'])
us_mobility_df['date'] = us_mobility_df['date'].dt.strftime('%m/%d/%Y')
# Visualize the mobility changes at "Retail and Recreation" in the US over time
# To visualize the mobility changes in other categories, simply change the color="<category>"
us_fig =px.choropleth(us_mobility_df,
locations = 'iso_3166_2_code',
color="recreation",
animation_frame="date",
color_continuous_scale="Inferno",
locationmode='USA-states',
scope="usa",
range_color=(-100, 20),
labels={'recreation':'Retail and Recreation'}
)
us_fig.update_layout(height=650,
margin={"r":0,"t":100,"l":0,"b":0},
title = "Google Mobility Trend in the US: Changes in Retail and Recreation Visits Since Feb 2020",
title_font_size=20)
us_fig.show()
# Compare moblity trend between States
# Four States were selected for demonstration purpose, including California, Florida, Illinois, and New York
us_df = us_mobility_df.drop(['country_region', 'sub_region_1'], axis=1)
us_df_melted = pd.melt(us_df, id_vars=["iso_3166_2_code", "date"] ,
value_vars=["recreation","grocery","park","transit","workplace", "residential"],
var_name="location", value_name='mobility')
us_df_subset = us_df_melted[us_df_melted["iso_3166_2_code"].isin(["CA", "FL", "IL", "NY"])]
fig = px.line(us_df_subset, x= "date", y= "mobility",color="location",
facet_col="iso_3166_2_code", facet_col_wrap=1,
line_shape='linear', render_mode="svg", hover_name="location",
labels=dict(date="Date", mobility="Mobility (%)", location="Location"))
fig.update_layout(height=900,
width=800,
margin={"r":0,"t":100,"l":0,"b":0},
title = "Mobility Trend in Four States Since Feb 2020",
title_font_size=25)
fig.add_hline(y=0, line_dash="dot",
annotation_text="Baseline",
annotation_position="bottom right")
fig.update_yaxes(matches=None)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_xaxes(tickvals=["03/01/2020","05/01/2020","07/01/2020","09/01/2020","11/01/2020","01/01/2021"])
fig.add_vrect(x0="03/09/2020", x1="04/11/2020", col=1,
annotation_text="Decline", annotation_position="top left",
fillcolor="green", opacity=0.25, line_width=0)
fig.show()
Residential", across all four states;Groceries and Pharmacy" since the pandemic of COVID19;Park" mobility was observed during summer in both the states of Illinois and New York by comparing to their baseline calculated between Jan and Feb of 2020. However, the absence of mobility increase in "Park" in some states does not imply less mobility compared to other states, such as Illinois, because the absolute number of visits is not shared in Google mobility data table. To visualize the mobility data in county data on the map, we would need the county shape files, such as IL_BNDY_County_Py.shp. These files can be downloaded with this link. Download the file and decompress the zip file before you run the cell below.
# Add path of illinois shape file(IL_BNDY_County_py.shp) to the line below.
# This file can be found inside decompressed zip folder (IL_BNDY_County)
shape_f = "<PATH_to_IL_BNDY_County_py.shp>"
map_df = gpd.read_file(shape_f)
# Save the GeoJson file at local
map_df.to_file("IL_States_gpd.json", driver='GeoJSON')
# Load GeoJson file
with open("IL_States_gpd.json") as f:
illinois_state = json.load(f)
# Modify the fips id to align with the fips id in Google mobility table
def modify_fips(number):
newnum = '{:03d}'.format(number)
newnum = '17' + str(newnum)
return newnum
for i in range(len(illinois_state["features"])):
subdict = illinois_state["features"][i]
newfips = modify_fips(subdict["properties"]["CO_FIPS"])
illinois_state["features"][i]["properties"]["CO_FIPS"] = newfips
# Query county level monthly mobility data from Google Mobility data table
il_month_sql = """
SELECT
country_region_code,
sub_region_1,
sub_region_2,
census_fips_code,
EXTRACT (YEAR FROM date) as year,
EXTRACT (MONTH FROM date) as month,
ROUND(AVG(retail_and_recreation_percent_change_from_baseline),2) as recreation,
ROUND(AVG(grocery_and_pharmacy_percent_change_from_baseline),2) as grocery,
ROUND(AVG(parks_percent_change_from_baseline),2) as park,
ROUND(AVG(transit_stations_percent_change_from_baseline),2) as transit,
ROUND(AVG(workplaces_percent_change_from_baseline),2) as workplace,
ROUND(AVG(residential_percent_change_from_baseline),2) as residential
FROM `bigquery-public-data.covid19_google_mobility.mobility_report`
WHERE
country_region_code = 'US' AND
sub_region_1 = "Illinois" AND
census_fips_code is not null
GROUP BY country_region_code, sub_region_1, sub_region_2, census_fips_code, year, month
order by sub_region_1, sub_region_2, year, month
"""
query_job = client.query(il_month_sql)
il_month_df = query_job.to_dataframe()
il_month_df.fillna(0, inplace=True)
il_month_df.head()
# Create a new date format
il_month_df['date'] = il_month_df["year"].astype(str) + "-" + il_month_df["month"].astype(str)
# Visualize the residential mobility change in all counties of IL since Feb 2020
il_residential_df = il_month_df.drop(['recreation', 'grocery', 'park', 'transit', 'workplace'], axis=1)
il_residential_df['date'] = pd.to_datetime(il_residential_df['date'])
il_residential_df['date'] = il_residential_df['date'].dt.strftime('%m/%Y')
fig = px.choropleth_mapbox(il_residential_df, geojson=illinois_state,
locations='census_fips_code',
color='residential',
color_continuous_scale="redor",
range_color=(-1, 23),
featureidkey="properties.CO_FIPS",
mapbox_style="carto-positron",
opacity=0.6,
center = {"lat": 40, "lon": -89.3985},
zoom=5.7,
hover_name = "sub_region_2",
animation_frame='date')
fig.update_geos(fitbounds="locations",visible=False)
fig.update_layout(height=800,
width=750,
margin={"r":0,"t":50,"l":0,"b":0},
title = "Residential Mobility Trend in IL Since Feb 2020",
title_font_size=30)
fig.show()